`include  "defines.v"
module Dcache (
  input clk,
  input rst_n,
  input invalidate, // 刷新plru中的有效位
  // LSU 访问的接口
  input                   lsu_dcache_addr_valid_i,
  input  [`PADDR_W-1:0]   lsu_dcache_addr_i,
  input  [7:0]            lsu_dcache_strb_i,
  input                   lsu_dcache_wen_i,
  input                   lsu_dcache_invalidate_i,
  input  [63:0]           lsu_dcache_wdata_i,
  output [63:0]           dcache_lsu_rdata_o,
  output                  dcache_lsu_data_valid_o,
  // 下游访问接口 L2Cache 或内存
  output                  dcache_mem_addr_valid_o,
  output                  dcache_mem_wen_o,
  output [`PADDR_W-1:0]   dcache_mem_addr_o,
  output [511:0]          dcache_mem_wdata_o,
  input                   mem_dcache_data_valid_i,
  input [511:0]           mem_dcache_rdata_i
);
// main state machine
parameter IDLE    = 4'd1,
          CHECK   = 4'd2,
          READ    = 4'd3,
          WRITE   = 4'd4,
          REFILL  = 4'd5;
reg  [3:0] cur_state;
reg  [3:0] nxt_state;

// flush state machine 
parameter FLUSH_IDLE  = 3'b001,
          FLUSH_READ  = 3'b010,
          FLUSH_WRITE = 3'b100;
reg  [2:0] cur_flush_state;
reg  [2:0] nxt_flush_state;

// meta data 
reg [0:64*4-1] meta0V  ;reg [0:64*4-1] meta0D ;
reg [0:64*4-1] meta1V  ;reg [0:64*4-1] meta1D ;
reg [0:64*4-1] meta2V  ;reg [0:64*4-1] meta2D ;
reg [0:64*4-1] meta3V  ;reg [0:64*4-1] meta3D ;
reg [2:0] plru[0:64*4-1];

// data hit signal 
wire [1:0]   bankSel;
wire [0:3]   sram_wen   ;
wire [5:0]   sram_addr  ;// TODO:升级非阻塞Cache时控制信号要更精细
wire [511:0] sram_wmask ;// TODO:升级非阻塞Cache时控制信号要更精细
wire [511:0] sram_din   ;// TODO:升级非阻塞Cache时控制信号要更精细
wire [17:0]  PtagIn     ;// TODO:升级非阻塞Cache时控制信号要更精细
wire [17:0]  PtagOut0   [0:3];
wire [17:0]  PtagOut1   [0:3];
wire [17:0]  PtagOut2   [0:3];
wire [17:0]  PtagOut3   [0:3];
wire [511:0] DataOut0   [0:3];
wire [511:0] DataOut1   [0:3];
wire [511:0] DataOut2   [0:3];
wire [511:0] DataOut3   [0:3];
wire [511:0] DataOut    [0:3];
wire [17:0]  PtagOut    [0:3];

// Invalidate Signal
wire flushUse;
wire flush_addr_valid;
wire flush_wen;
wire [`PADDR_W-1:0] flush_addr;
wire [511:0] flush_wdata;
wire flush_resp_valid;
wire needWB;
wire curMetaV;
wire curMetaD;
wire [17:0] curMetaTag;
wire [511:0] curData;
reg  [6+2+2:0] flushCnt; // ways2 + index6 + bank2
wire [1:0] flushWay;
wire [5:0] flushIndex;
wire [5:0] flushSramAddr;
wire [7:0] flushBankSelIndex;

// Data 
wire [127:0]sram_dout [0:3];
wire refill_bypass;
reg refill_bypassR ; // 因为访问SRAM要有两个周期 bypass 也要维持至少两个周期
// 查询路径
wire [5:0] index  = {6{lsu_dcache_addr_valid_i}} & lsu_dcache_addr_i[11:6];
assign bankSel    = {2{lsu_dcache_addr_valid_i}} & lsu_dcache_addr_i[13:12];
wire [7:0] bankSelIndex = {bankSel,index};
wire [0:3] hit_vector;

// 命中逻辑 meta中的entry有效 且 tag比较成功
assign PtagOut[0] = bankSel[1] ? (bankSel[0] ? PtagOut3[0]:PtagOut2[0]) : (bankSel[0] ? PtagOut1[0] : PtagOut0[0] );
assign PtagOut[1] = bankSel[1] ? (bankSel[0] ? PtagOut3[1]:PtagOut2[1]) : (bankSel[0] ? PtagOut1[1] : PtagOut0[1] );
assign PtagOut[2] = bankSel[1] ? (bankSel[0] ? PtagOut3[2]:PtagOut2[2]) : (bankSel[0] ? PtagOut1[2] : PtagOut0[2] );
assign PtagOut[3] = bankSel[1] ? (bankSel[0] ? PtagOut3[3]:PtagOut2[3]) : (bankSel[0] ? PtagOut1[3] : PtagOut0[3] );

assign DataOut[0] = bankSel[1] ? (bankSel[0] ? DataOut3[0]:DataOut2[0]) : (bankSel[0] ? DataOut1[0] : DataOut0[0] );
assign DataOut[1] = bankSel[1] ? (bankSel[0] ? DataOut3[1]:DataOut2[1]) : (bankSel[0] ? DataOut1[1] : DataOut0[1] );
assign DataOut[2] = bankSel[1] ? (bankSel[0] ? DataOut3[2]:DataOut2[2]) : (bankSel[0] ? DataOut1[2] : DataOut0[2] );
assign DataOut[3] = bankSel[1] ? (bankSel[0] ? DataOut3[3]:DataOut2[3]) : (bankSel[0] ? DataOut1[3] : DataOut0[3] );
// wire [17:0] Debug_ptag0 = PtagOut[0];
// wire [17:0] Debug_ptag1 = PtagOut[1];
// wire [17:0] Debug_ptag2 = PtagOut[2];
// wire [17:0] Debug_ptag3 = PtagOut[3];
// wire Debug_165V0 = meta0V[165];
// wire Debug_165V1 = meta1V[165];
// wire Debug_165V2 = meta2V[165];
// wire Debug_165V3 = meta3V[165];
// wire Debug_Sel0V = meta0V[bankSelIndex];
// wire Debug_Sel1V = meta1V[bankSelIndex];
// wire Debug_Sel2V = meta2V[bankSelIndex];
// wire Debug_Sel3V = meta3V[bankSelIndex];
wire [17:0] checkTag = lsu_dcache_addr_i[`PADDR_W-1:14];
assign hit_vector[0]  = meta0V[bankSelIndex] && (PtagOut[0] == checkTag);
assign hit_vector[1]  = meta1V[bankSelIndex] && (PtagOut[1] == checkTag);
assign hit_vector[2]  = meta2V[bankSelIndex] && (PtagOut[2] == checkTag);
assign hit_vector[3]  = meta3V[bankSelIndex] && (PtagOut[3] == checkTag);
wire hit = |hit_vector && (cur_state == CHECK);
// 根据命中的way选择相应的数据
wire [511:0] hit_data = ({512{hit_vector[0]}} & DataOut[0] )| 
                        ({512{hit_vector[1]}} & DataOut[1] )| 
                        ({512{hit_vector[2]}} & DataOut[2] )| 
                        ({512{hit_vector[3]}} & DataOut[3] );
// refill 旁路的数据选择
wire [511:0] out_data = (refill_bypass || refill_bypassR) ? mem_dcache_rdata_i : hit_data;
// 根据地址选择最终输出的64bit数据
assign dcache_lsu_rdata_o = {64{(lsu_dcache_addr_i[5:3] == 3'd0)}} & out_data[64*1-1:64*0] | 
                            {64{(lsu_dcache_addr_i[5:3] == 3'd1)}} & out_data[64*2-1:64*1] | 
                            {64{(lsu_dcache_addr_i[5:3] == 3'd2)}} & out_data[64*3-1:64*2] | 
                            {64{(lsu_dcache_addr_i[5:3] == 3'd3)}} & out_data[64*4-1:64*3] | 
                            {64{(lsu_dcache_addr_i[5:3] == 3'd4)}} & out_data[64*5-1:64*4] | 
                            {64{(lsu_dcache_addr_i[5:3] == 3'd5)}} & out_data[64*6-1:64*5] | 
                            {64{(lsu_dcache_addr_i[5:3] == 3'd6)}} & out_data[64*7-1:64*6] | 
                            {64{(lsu_dcache_addr_i[5:3] == 3'd7)}} & out_data[64*8-1:64*7] ;

always@(posedge clk or negedge rst_n)
  if(~rst_n)
    refill_bypassR <= 1'b0;
  else 
    refill_bypassR <= refill_bypass;
// 命令有效且数据准备好了
reg invalidateOK;
// 1.hit 2.refill & next clock  3.invalidate
assign dcache_lsu_data_valid_o = ((cur_state == CHECK) && (nxt_state == IDLE)) || 
                                 (refill_bypass || refill_bypassR)             || 
                                  (invalidateOK);
// 写命中信号处理
wire  hit_wen           = (cur_state == CHECK) && hit && lsu_dcache_wen_i;
wire [63:0]  wmask      = {{8{lsu_dcache_strb_i[7]}},
                           {8{lsu_dcache_strb_i[6]}},
                           {8{lsu_dcache_strb_i[5]}},
                           {8{lsu_dcache_strb_i[4]}},
                           {8{lsu_dcache_strb_i[3]}},
                           {8{lsu_dcache_strb_i[2]}},
                           {8{lsu_dcache_strb_i[1]}},
                           {8{lsu_dcache_strb_i[0]}}};

// wire [511:0] hit_wmask  = wmask << lsu_dcache_addr_i[5:3];

wire [511:0] hit_wmask  = {512{(lsu_dcache_addr_i[5:3] == 3'd0)}} & (wmask<<(64*0))|
                          {512{(lsu_dcache_addr_i[5:3] == 3'd1)}} & (wmask<<(64*1))|
                          {512{(lsu_dcache_addr_i[5:3] == 3'd2)}} & (wmask<<(64*2))|
                          {512{(lsu_dcache_addr_i[5:3] == 3'd3)}} & (wmask<<(64*3))|
                          {512{(lsu_dcache_addr_i[5:3] == 3'd4)}} & (wmask<<(64*4))|
                          {512{(lsu_dcache_addr_i[5:3] == 3'd5)}} & (wmask<<(64*5))|
                          {512{(lsu_dcache_addr_i[5:3] == 3'd6)}} & (wmask<<(64*6))|
                          {512{(lsu_dcache_addr_i[5:3] == 3'd7)}} & (wmask<<(64*7));
// {(64*7)'h0,wmask} | 
// {(64*6)'h0,wmask,(64*1)'h0} | 
// {(64*5)'h0,wmask,(64*2)'h0} | 
// {(64*4)'h0,wmask,(64*3)'h0} | 
// {(64*3)'h0,wmask,(64*4)'h0} | 
// {(64*2)'h0,wmask,(64*5)'h0} | 
// {(64*1)'h0,wmask,(64*6)'h0} | 
// {wmask,(64*7)'h0} ;
wire [511:0] hit_wdata  = {8{lsu_dcache_wdata_i}};

// findout invalid way
wire [0:3] invalid_victor = ~{meta0V[bankSelIndex],meta1V[bankSelIndex],meta2V[bankSelIndex],meta3V[bankSelIndex]};
wire invalid = |invalid_victor;
// PLRU 
wire [0:3] plru_way = { ~plru[bankSelIndex][2] & ~plru[bankSelIndex][1] ,
                        ~plru[bankSelIndex][2] &  plru[bankSelIndex][1] ,
                         plru[bankSelIndex][2] & ~plru[bankSelIndex][0] ,
                         plru[bankSelIndex][2] &  plru[bankSelIndex][0] };

wire [0:3] dec_way = invalid ? invalid_victor : plru_way;

// find out dirst 1
reg [0:3]replace_way ;
always@(*)begin
  replace_way = 4'b1000;
  if(dec_way[0]) replace_way = 4'b1000;
  if(dec_way[1]) replace_way = 4'b0100;
  if(dec_way[2]) replace_way = 4'b0010;
  if(dec_way[3]) replace_way = 4'b0001;
end
// replace way check dirty
wire dirty = (replace_way[0] & meta0D[bankSelIndex]) | 
             (replace_way[1] & meta1D[bankSelIndex]) | 
             (replace_way[2] & meta2D[bankSelIndex]) | 
             (replace_way[3] & meta3D[bankSelIndex]) ;
// find replace tag
wire [17:0]replaceWaddr = ({18{replace_way[0]}} & PtagOut[0] ) |
                          ({18{replace_way[1]}} & PtagOut[1] ) |
                          ({18{replace_way[2]}} & PtagOut[2] ) |
                          ({18{replace_way[3]}} & PtagOut[3] ) ;

always@(posedge clk or negedge rst_n)
  if(~rst_n)
    cur_state <= IDLE;
  else 
    cur_state <= nxt_state;

always@(*)begin
  case(cur_state)
    IDLE:
      if(lsu_dcache_addr_valid_i && ~lsu_dcache_invalidate_i)
        nxt_state = CHECK;
      else  
        nxt_state = IDLE ;
    CHECK:
      if(~hit && dirty) 
        nxt_state = WRITE;
      else if(~hit && ~dirty) 
        nxt_state = READ;
      else 
        nxt_state = IDLE;
    WRITE:
      if(mem_dcache_data_valid_i) 
        nxt_state = READ;
      else 
        nxt_state = WRITE;
    READ:
      if(mem_dcache_data_valid_i) 
        nxt_state = REFILL;
      else
        nxt_state = READ;
    REFILL:
      nxt_state = IDLE;
    default:
      nxt_state = IDLE;
  endcase
end
// access Mem
wire  normal_dcache_mem_addr_valid_o = (cur_state == READ) || (cur_state == WRITE);
wire  normal_dcache_mem_wen_o        = cur_state == WRITE;
wire [`PADDR_W-1:0] normal_dcache_mem_addr_o  = (cur_state == READ ) ? {lsu_dcache_addr_i[`PADDR_W-1:6],6'd0} :
                                                (cur_state == WRITE) ? {replaceWaddr,bankSel,index,6'd0} :
                                                `ZERO;// 512bit 对齐
wire [511:0] normal_dcache_mem_wdata_o      = ({512{replace_way[0]}} & DataOut[0] )| 
                                              ({512{replace_way[1]}} & DataOut[1] )| 
                                              ({512{replace_way[2]}} & DataOut[2] )| 
                                              ({512{replace_way[3]}} & DataOut[3] );
assign refill_bypass           = (cur_state == REFILL);
function UpdatePlru;
  input [0:3] sel;
  input [0:2] plruVal;
  begin
    UpdatePlru = sel[0] ? {1'b1 , 1'b1 ,  plruVal[0]} :
                 sel[1] ? {1'b1 , 1'b0 ,  plruVal[0]} :
                 sel[2] ? {1'b0 , plruVal[1] ,  1'b1} :
                 sel[3] ? {1'b0 , plruVal[1] ,  1'b0} : plruVal;
  end
endfunction
// update V D PLRU
genvar i,j;
generate 
for(j=0;j<4;j=j+1)begin
  for(i=0;i<64;i=i+1)begin
    always@(posedge clk or negedge rst_n)
        if(~rst_n)
          plru[i+j*64] <= 'd0;
        else if(invalidate)begin
          plru[i+j*64] <= 'd0;
        end else if((index == i) && (bankSel == j))begin
          if(refill_bypass)
            plru[i+j*64] <= UpdatePlru(replace_way,plru[i+j*64]);
          else if((cur_state == CHECK) && hit)
            plru[i+j*64] <= UpdatePlru(hit_vector,plru[i+j*64]);
        end 
    always@(posedge clk or negedge rst_n)
      if(~rst_n || invalidateOK)begin
        meta0V[i+j*64] <= 'd0;
        meta1V[i+j*64] <= 'd0;
        meta2V[i+j*64] <= 'd0;
        meta3V[i+j*64] <= 'd0;
      end else if((cur_state == REFILL) && (index == i) && (bankSel == j))begin // refill 时 把替换的路写为1 其他路不变
        meta0V[i+j*64] <= replace_way[0] | meta0V[i+j*64];
        meta1V[i+j*64] <= replace_way[1] | meta1V[i+j*64];
        meta2V[i+j*64] <= replace_way[2] | meta2V[i+j*64];
        meta3V[i+j*64] <= replace_way[3] | meta3V[i+j*64];
      end
    always@(posedge clk or negedge rst_n)
      if(~rst_n || invalidateOK)begin
        meta0D[i+j*64] <= 'd0;
        meta1D[i+j*64] <= 'd0;
        meta2D[i+j*64] <= 'd0;
        meta3D[i+j*64] <= 'd0;
      end else if((cur_state == IDLE) && (index == i) && hit_wen && (bankSel == j))begin // 写命中 将命中路写dirty位 其他不变
        meta0D[i+j*64] <= hit_vector[0] | meta0D[i+j*64];
        meta1D[i+j*64] <= hit_vector[1] | meta1D[i+j*64];
        meta2D[i+j*64] <= hit_vector[2] | meta2D[i+j*64];
        meta3D[i+j*64] <= hit_vector[3] | meta3D[i+j*64];
      end else if((cur_state == WRITE) && (index == i) && (bankSel == j))begin  // 写回内存 将替换路dirty清零 其他不变
        meta0D[i+j*64] <= ~replace_way[0] & meta0D[i+j*64];
        meta1D[i+j*64] <= ~replace_way[1] & meta1D[i+j*64];
        meta2D[i+j*64] <= ~replace_way[2] & meta2D[i+j*64];
        meta3D[i+j*64] <= ~replace_way[3] & meta3D[i+j*64];
      end else if ((cur_state == REFILL) && (index == i) && (bankSel == j))begin // 从内存读回时 如果上层为写的话 要设置dirty位
        meta0D[i+j*64] <= replace_way[0] | meta0D[i+j*64];
        meta1D[i+j*64] <= replace_way[1] | meta1D[i+j*64];
        meta2D[i+j*64] <= replace_way[2] | meta2D[i+j*64];
        meta3D[i+j*64] <= replace_way[3] | meta3D[i+j*64];
      end
  end
end
endgenerate
// refill or hit write
// need select addr ,because invalidate machine state
assign sram_addr   = flushUse ? flushSramAddr : index;
// when refill , if lsu request write , logical need concat lsu write and mem read 
assign sram_din    = hit_wen          ? hit_wdata : 
                     lsu_dcache_wen_i ? ((mem_dcache_rdata_i & ~hit_wmask) | (hit_wdata & hit_wmask)) :
                     mem_dcache_rdata_i;
assign sram_wmask  = hit_wen ? hit_wmask : ~512'h0;
assign PtagIn      = lsu_dcache_addr_i[`PADDR_W-1:14];

generate
  for(i=0;i<4;i=i+1)begin
    // 1. refill and replace  2.hit write and hit way
    assign sram_wen[i]   = ((cur_state == REFILL) && replace_way[i]) || (hit_wen && hit_vector[i]);
    
    SramPtagBankx4 PtagBank(
      .clk    (clk),
      .sramEn (lsu_dcache_addr_valid_i),

      .rdAddr  ( sram_addr ),
      .rdData0 ( PtagOut0[i] ),
      .rdData1 ( PtagOut1[i] ),
      .rdData2 ( PtagOut2[i] ),
      .rdData3 ( PtagOut3[i] ),

      .wrAddr  ( {bankSel,sram_addr} ),
      .wrEn    ( sram_wen[i]         ),
      .wrData  ( PtagIn              ) 
    );

    SramDataBankx4 DataBank(
      .clk     ( clk),
      .sramEn  ( lsu_dcache_addr_valid_i),
      .rdAddr  ( sram_addr   ),
      .rdData0 ( DataOut0[i] ),
      .rdData1 ( DataOut1[i] ),
      .rdData2 ( DataOut2[i] ),
      .rdData3 ( DataOut3[i] ),

      .wrAddr  ( {bankSel,sram_addr} ),
      .wrEn    ( sram_wen[i]         ),
      .wrMask  ( sram_wmask          ),
      .wrData  ( sram_din            ) 
    );
  end
endgenerate

// Invalidate state machine
// now is use traverse way
// TODO:can use more efficient way
assign flushWay   = flushCnt[9:8];
assign flushBank  = flushCnt[7:6];
assign flushIndex = flushCnt[5:0];
assign flushBankSelIndex = {flushBank,flushIndex};
assign curMetaV = ({flushWay == 2'd0} & meta0V[flushBankSelIndex])|
                  ({flushWay == 2'd1} & meta1V[flushBankSelIndex])|
                  ({flushWay == 2'd2} & meta2V[flushBankSelIndex])|
                  ({flushWay == 2'd3} & meta3V[flushBankSelIndex]);

assign curMetaD = ({flushWay == 2'd0} & meta0D[flushBankSelIndex])|
                  ({flushWay == 2'd1} & meta1D[flushBankSelIndex])|
                  ({flushWay == 2'd2} & meta2D[flushBankSelIndex])|
                  ({flushWay == 2'd3} & meta3D[flushBankSelIndex]);

assign curMetaTag       = ({18{flushWay == 2'd0}} & PtagOut[0])|
                          ({18{flushWay == 2'd1}} & PtagOut[1])|
                          ({18{flushWay == 2'd2}} & PtagOut[2])|
                          ({18{flushWay == 2'd3}} & PtagOut[3]);

assign needWB    = curMetaV && curMetaD;

always@(posedge clk or negedge rst_n)
  if(~rst_n)
    cur_flush_state <= FLUSH_IDLE;
  else 
    cur_flush_state <= nxt_flush_state;

always@(*)begin
  case(cur_flush_state)
    FLUSH_IDLE:
      if(lsu_dcache_invalidate_i && ~invalidateOK) 
        nxt_flush_state=FLUSH_READ;
      else 
        nxt_flush_state=FLUSH_IDLE;
    FLUSH_READ:
      if(flushCnt[10]) 
        nxt_flush_state=FLUSH_IDLE; 
      else if(needWB) 
        nxt_flush_state=FLUSH_WRITE; 
      else 
        nxt_flush_state=FLUSH_READ;
    FLUSH_WRITE:
      if(flush_resp_valid)
        nxt_flush_state=FLUSH_READ;
      else  
        nxt_flush_state=FLUSH_WRITE;
    default:
      nxt_flush_state=FLUSH_IDLE;
  endcase
end

always@(posedge clk or negedge rst_n)
  if(~rst_n)
    flushCnt <= `ZERO;
  else if(flushCnt[10])
    flushCnt <= `ZERO;
  else if((cur_flush_state == FLUSH_READ) && ~needWB) // 当拍没有要写回的信息 自动加一
    flushCnt <= flushCnt + 1'b1;
  else if ((cur_flush_state == FLUSH_WRITE) && (nxt_flush_state == FLUSH_READ)) // 写回完成自动加一
    flushCnt <= flushCnt + 1'b1;

// To Sram Data
assign flushUse         = (cur_flush_state == FLUSH_READ) || (cur_flush_state == FLUSH_WRITE);
assign flushSramAddr    = flushIndex;
// To Write Mem
assign flush_addr_valid = (cur_flush_state == FLUSH_WRITE);
assign flush_wen        = (cur_flush_state == FLUSH_WRITE);
assign flush_addr       = {curMetaTag , flushBank , flushIndex , 6'd0};
assign flush_wdata      = ({512{flushWay == 2'd0}} & DataOut[0])|
                          ({512{flushWay == 2'd1}} & DataOut[1])|
                          ({512{flushWay == 2'd2}} & DataOut[2])|
                          ({512{flushWay == 2'd3}} & DataOut[3]);

assign flush_resp_valid = mem_dcache_data_valid_i;

always@(posedge clk or negedge rst_n)
  if(~rst_n)
    invalidateOK <= 1'b0;
  else if((cur_flush_state == FLUSH_READ) && (nxt_flush_state == FLUSH_IDLE))
    invalidateOK <= 1'b1;
  else 
    invalidateOK <= 1'b0;

// To mem 跟Cache 写通路共用 因为 invalidate 与 hit 逻辑都是 走的LSU通路 所以不会同时发生
assign  dcache_mem_addr_valid_o = flushUse ? flush_addr_valid : normal_dcache_mem_addr_valid_o;
assign  dcache_mem_wen_o        = flushUse ? flush_wen        : normal_dcache_mem_wen_o       ;
assign  dcache_mem_addr_o       = flushUse ? flush_addr       : normal_dcache_mem_addr_o      ;
assign  dcache_mem_wdata_o      = flushUse ? flush_wdata      : normal_dcache_mem_wdata_o     ; 

endmodule


module SramDataBankx4(
  input clk ,
  input sramEn,

  input  [5:0]   rdAddr,
  output [511:0] rdData0,
  output [511:0] rdData1,
  output [511:0] rdData2,
  output [511:0] rdData3,

  input  [7:0]   wrAddr,
  input          wrEn,
  input  [511:0] wrMask,
  input  [511:0] wrData 
);

wire [5:0] sramAddr = wrEn ? wrAddr[5:0] : rdAddr[5:0];
wire [511:0] sramOut [0:3];

genvar i;
generate
  for(i=0;i<4;i++)begin : DataBank
    DcacheData64x512bit data(
    .CLK  ( clk       ),
    .CEN  ( ~sramEn    ),
    .WEN  ( ~(wrEn && (i == wrAddr[7:6]))),
    .BWEN ( wrMask   ),
    .A    ( sramAddr  ),
    .D    ( wrData    ), 
    .Q    ( sramOut[i])
  );
  end
endgenerate

assign rdData0 = sramOut[0]; 
assign rdData1 = sramOut[1]; 
assign rdData2 = sramOut[2]; 
assign rdData3 = sramOut[3]; 

endmodule


module SramPtagBankx4(
  input clk ,
  input sramEn,

  input  [5:0]   rdAddr,
  output [17:0]  rdData0,
  output [17:0]  rdData1,
  output [17:0]  rdData2,
  output [17:0]  rdData3,

  input  [7:0]   wrAddr,
  input          wrEn,
  input  [17:0]  wrData 
);

wire [5:0]  sramAddr = wrEn ? wrAddr[5:0] : rdAddr[5:0];
wire [17:0] sramOut [0:3];

genvar i;
generate
  for(i=0;i<4;i++)begin : TagBank
    DcachePtag64x18bit tag(
    .CLK  ( clk       ),
    .CEN  ( ~sramEn    ),
    .WEN  ( ~(wrEn && (i == wrAddr[7:6]))),
    .BWEN ( ~18'd0    ),
    .A    ( sramAddr  ),
    .D    ( wrData    ), 
    .Q    ( sramOut[i])
  );
  end
endgenerate

assign rdData0 = sramOut[0]; 
assign rdData1 = sramOut[1]; 
assign rdData2 = sramOut[2]; 
assign rdData3 = sramOut[3]; 

endmodule